*****************************************************************************
* Figure 4: The Policy Consequences of Postal Voting in Close Referendums 		   	 (Individual-level Data)
*****************************************************************************
	
	use data4.dta, clear

	* (i) Merge administrative data
	
	merge m:1 anr using data5.dta
	
	* (ii) Generate VOX yes share and keep only one observation per vote
	
	bysort anr: egen votechoice_mean=mean(votechoice)
	
	bysort anr: gen indi_anr=_n 
	*br year month day anr volkjaproz titel if inrange(volkjaproz,46,54) & inrange(year,1981,2009) & indi_anr==1
	
	tab anr if inrange(volkjaproz,46,54) & inrange(year,1981,2009)	
	tab votechoice_mean if inrange(volkjaproz,46,54) & inrange(year,1981,2009)	& indi_anr==1

	* (iii) Generate VOX yes share by political interest and income
		
	bysort anr: egen help1=mean(votechoice) if group_interest_high==0 & group_income_high==0
	bysort anr: egen help2=mean(votechoice) if group_interest_high==0 & group_income_high==1
	bysort anr: egen help3=mean(votechoice) if group_interest_high==0 & group_income_high==2
	
	bysort anr: egen help4=mean(votechoice) if group_interest_high==1 & group_income_high==0
	bysort anr: egen help5=mean(votechoice) if group_interest_high==1 & group_income_high==1
	bysort anr: egen help6=mean(votechoice) if group_interest_high==1 & group_income_high==2
		
	
	bysort anr: egen votechoice_low_low=mean(help1) 
	bysort anr: egen votechoice_low_middle=mean(help2)
	bysort anr: egen votechoice_low_high=mean(help3) 
	
	bysort anr: egen votechoice_high_low=mean(help4)
	bysort anr: egen votechoice_high_middle=mean(help5)
	bysort anr: egen votechoice_high_high=mean(help6)
	
	bysort anr: egen postal_byanr=mean(postal_all)
	
	drop help*
	
	gen ones=1
	
	* (iv) Generate turnout share by political interest and income


	bysort anr: egen help1=sum(ones)  	if turnout==1 & !missing(group_interest_high)& !missing(group_income_high)
	bysort anr: egen help2=sum(ones) if group_interest_high==0 & group_income_high==0 & turnout==1
	bysort anr: egen help3=sum(ones) if group_interest_high==0 & group_income_high==1 & turnout==1
	bysort anr: egen help4=sum(ones) if group_interest_high==0 & group_income_high==2 & turnout==1
	bysort anr: egen help5=sum(ones) if group_interest_high==1 & group_income_high==0 & turnout==1
	bysort anr: egen help6=sum(ones) if group_interest_high==1 & group_income_high==1 & turnout==1
	bysort anr: egen help7=sum(ones) if group_interest_high==1 & group_income_high==2 & turnout==1
		
	
	bysort anr: egen help8=mean(help2) 
	bysort anr: egen help9=mean(help3) 
	bysort anr: egen help10=mean(help4) 
	bysort anr: egen help11=mean(help5)
	bysort anr: egen help12=mean(help6) 
	bysort anr: egen help13=mean(help7)
			
	gen help14=help8/help1
	gen help15=help9/help1
	gen help16=help10/help1
	gen help17=help11/help1
	gen help18=help12/help1
	gen help19=help13/help1
				
	bysort anr: egen share_low_low=mean(help14) 
	bysort anr: egen share_low_middle=mean(help15) 
	bysort anr: egen share_low_high=mean(help16) 
	bysort anr: egen share_high_low=mean(help17) 
	bysort anr: egen share_high_middle=mean(help18) 
	bysort anr: egen share_high_high=mean(help19) 
	
	drop help*
	
	* (v) Generate VOX yes share by income
		
	bysort anr: egen help1=mean(votechoice) if group_income_high==0
	bysort anr: egen help2=mean(votechoice) if group_income_high==1
	bysort anr: egen help3=mean(votechoice) if group_income_high==2
	
	bysort anr: egen votechoice_income_low=mean(help1) 
	bysort anr: egen votechoice_income_middle=mean(help2)
	bysort anr: egen votechoice_income_high=mean(help3)
	
	
	drop help*
	
	bysort anr: egen help1=sum(ones)  	if turnout==1 & !missing(group_income_high)
	bysort anr: egen help1a=sum(ones)  	if turnout==1 & !missing(group_income_high) & postal_all==0
	
	bysort anr: egen help2=sum(ones) if group_income_high==0 & turnout==1
	bysort anr: egen help3=sum(ones) if group_income_high==1 & turnout==1
	bysort anr: egen help4=sum(ones) if group_income_high==2 & turnout==1
	
	bysort anr: egen help5=sum(ones) if group_income_high==0 & turnout==1 & postal_all==0
	bysort anr: egen help6=sum(ones) if group_income_high==1 & turnout==1 & postal_all==0
	bysort anr: egen help7=sum(ones) if group_income_high==2 & turnout==1 & postal_all==0
		

	bysort anr: egen help8=mean(help2) 
	bysort anr: egen help9=mean(help3)
	bysort anr: egen help10=mean(help4) 
	
	bysort anr: egen help11=mean(help5) 
	bysort anr: egen help12=mean(help6)
	bysort anr: egen help13=mean(help7) 
	
	gen help14=help8/help1
	gen help15=help9/help1
	gen help16=help10/help1
	gen help17=help11/help1a
	gen help18=help12/help1a
	gen help19=help13/help1a
		
	bysort anr: egen share_income_low=mean(help14) 
	bysort anr: egen share_income_middle=mean(help15)
	bysort anr: egen share_income_high=mean(help16)

	
	
	gen share_income_high_pv=share_income_high+(0.04*(1-postal_byanr))
	gen share_income_high_npv=share_income_high-(0.04*(postal_byanr))
	gen share_income_middle_pv=share_income_middle+(0.01*(1-postal_byanr))
	gen share_income_middle_npv=share_income_middle-(0.01*(postal_byanr))
	gen share_income_low_pv=1-share_income_high_pv-share_income_middle_pv
	gen share_income_low_npv=1-share_income_high_npv-share_income_middle_npv
						
	drop help*	
	
	* (vi) Generate VOX yes share by political interest
		
	bysort anr: egen help1=mean(votechoice) if group_interest_high==0
	bysort anr: egen help2=mean(votechoice) if group_interest_high==1
	bysort anr: egen votechoice_interest_low=mean(help1) 
	bysort anr: egen votechoice_interest_high=mean(help2)
	
	drop help*
	

	bysort anr: egen help1=sum(ones)  	if turnout==1 & !missing(group_interest_high)
	bysort anr: egen help1b=sum(ones)  	if turnout==1 & !missing(group_interest_high) & postal_all==0
	
	bysort anr: egen help2=sum(ones) if group_interest_high==0 & turnout==1 
	bysort anr: egen help3=sum(ones) if group_interest_high==1 & turnout==1 
	
	bysort anr: egen help4=sum(ones) if group_interest_high==0 & turnout==1 & postal_all==0
	bysort anr: egen help5=sum(ones) if group_interest_high==1 & turnout==1 & postal_all==0

	bysort anr: egen help6=mean(help2) 
	bysort anr: egen help7=mean(help3)
	bysort anr: egen help8=mean(help4) 
	bysort anr: egen help9=mean(help5)
	
	
	gen help10=help6/help1
	gen help11=help7/help1
	gen help12=help8/help1b
	gen help13=help9/help1b
		
	bysort anr: egen share_interest_low=mean(help10)  // overall share of low/high interest individuals in voting population
	bysort anr: egen share_interest_high=mean(help11)
		
	gen share_interest_low_pv=share_interest_low+(0.04*(1-postal_byanr))
	gen share_interest_low_npv=share_interest_low-(0.04*postal_byanr)
	gen share_interest_high_pv=1-share_interest_low_pv
	gen share_interest_high_npv=1-share_interest_low_npv
	
	* (vii) Keep only close votes
	
	gen close_election=0
	replace close_election=1 if  inrange(volkjaproz,46,54) & inrange(year,1981,2009) 
	keep if close_election==1 & indi_anr==1
	keep anr year day month titel volkjaproz close_election votechoice_* share* postal_byanr
	
	* (viii) Generate yes share in vox data and compare it to real yes share
	
	

	//gen votechoice_agg_observed=(share_interest_low*votechoice_interest_low+share_interest_high*votechoice_interest_high)*100
	gen volkjaproz_vox=( share_low_low* votechoice_low_low+ share_low_middle* votechoice_low_middle+ share_low_high* votechoice_low_high+ share_high_low* votechoice_high_low+ share_high_middle* votechoice_high_middle+share_high_high* votechoice_high_high)*100
	*gen diff_volkja_vox_real=volkjaproz_vox-volkjaproz

	//gen volkja_vox_corrected=volkjaproz_vox-diff_volkja_vox_real
	
	* (ix) Generate share of high and low interest voters in the population of actual voters under postal and non-postal voting
	
	gen share_low_low_npv=share_low_low-(0.01*postal_byanr) 
	gen share_low_low_pv=share_low_low+(0.01*(1-postal_byanr))
	
	gen share_low_middle_npv=share_low_middle-(0.01*postal_byanr) 
	gen share_low_middle_pv=share_low_middle+(0.01*(1-postal_byanr))
		
	gen share_low_high_npv=share_low_high+(0.01*postal_byanr) 
	gen share_low_high_pv=share_low_high-(0.01*(1-postal_byanr))
	
	gen share_high_low_npv=share_high_low+(0.01*postal_byanr) 
	gen share_high_low_pv=share_high_low-(0.01*postal_byanr) 
			
	gen share_high_middle_npv=share_high_middle+(0.02*postal_byanr) 
	gen share_high_middle_pv=share_high_middle-(0.02*postal_byanr) 
			
		
	gen share_high_high_npv=1-share_low_low_npv-share_low_low_npv-share_low_high_npv-share_high_low_npv-share_high_middle_npv
	gen share_high_high_pv=1-share_low_low_pv-share_low_low_pv-share_low_high_pv-share_high_low_pv-share_high_middle_pv
	
	
	* (x) Generate aggregate yes-share under postal and non-posal voting and correct for deviation between vox data and admin data
	

	gen votechoice_agg_npv=(share_low_low_npv*votechoice_low_low+share_low_middle_npv*votechoice_low_middle+share_low_high_npv*votechoice_low_high+share_high_low_npv*votechoice_high_low+share_high_middle_npv*votechoice_high_middle+share_high_high_npv*votechoice_high_high)*100
	gen votechoice_agg_pv=(share_low_low_pv*votechoice_low_low+share_low_middle_pv*votechoice_low_middle+share_low_high_pv*votechoice_low_high+share_high_low_pv*votechoice_high_low+share_high_middle_pv*votechoice_high_middle+share_high_high_pv*votechoice_high_high)*100

	gen diff_volkja_vox_real=(1-postal_byanr)*votechoice_agg_npv+postal_byanr*votechoice_agg_pv-volkjaproz

	
	gen votechoice_agg_npv_corr=votechoice_agg_npv-diff_volkja_vox_real
	gen votechoice_agg_pv_corr=votechoice_agg_pv-diff_volkja_vox_real
		
	*br anr titel volkjaproz volkjaproz_vox diff_volkja_vox_real votechoice_agg_*  votechoice_interest_low votechoice_interest_high share*
	
	sort anr
	egen anr_num=group(anr)
	
	keep anr* year day month  titel volkjaproz votechoice_agg_npv_corr votechoice_agg_pv_corr diff_volkja_vox_real
	merge m:1 anr using data5.dta

	
	saveold simulation_input_interest_and_income.dta, replace version(12)

	